1.1. Notebook2#
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
housing = pd.read_csv("./housing.csv") # https://github.com/ageron/data/tree/main/housing
housing["income_cat"] = pd.cut(housing["median_income"], bins=[0, 1.5, 3.0, 4.5, 6, np.inf], labels=[1, 2, 3, 4, 5])
strat_train_set, strat_test_set = train_test_split(housing, test_size=0.20, stratify=housing["income_cat"], random_state=42)
for set_ in (strat_train_set, strat_test_set):
set_.drop("income_cat", axis=1, inplace=True)
housing = strat_train_set.drop("median_house_value", axis=1)
housing_labels = strat_train_set["median_house_value"].copy()
---------------------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
Cell In[1], line 2
1 import numpy as np
----> 2 import pandas as pd
3 from sklearn.model_selection import train_test_split
5 housing = pd.read_csv("./housing.csv") # https://github.com/ageron/data/tree/main/housing
ModuleNotFoundError: No module named 'pandas'
# Simple Feature Engineering via attribute combinations
from ydata_profiling import ProfileReport
housing["rooms_per_house"] = housing["total_rooms"] / housing["households"]
housing["bedrooms_ratio"] = housing["total_bedrooms"] / housing["total_rooms"]
housing["people_per_house"] = housing["population"] / housing["households"]
profile = ProfileReport(housing, title="Pandas Profiling Report")
profile